load("XSTSF_production.RData")
source('functions.R')
f0_all_ct <- f0_all_pre %>% filter(focus_condition == 'ct' ) %>%
group_by(speaker) %>%
mutate(norm_f0 = scale(log(f0))) %>%
ungroup()
f0_di_ct_lcmh <- f0_all_ct %>%
filter(syntax %in% c('L', 'M') & diortri == 'di') %>%
mutate(sandhi_tone = case_when(sandhi_tone == 'HLLM' ~ 'HMML',
sandhi_tone == 'LLHL' ~ 'LLRF',
.default = sandhi_tone)) %>%
filter(!ind_no %in% c('S2_1_ct', 'S2_11_ct', 'S2_27_ct', 'S3_5_ct', 'S3_19_ct', 'S5_27_ct')) %>%
filter(is.na(sandhi_tone) == FALSE)
f0_di_ct_lcmh_h <- f0_di_ct_lcmh %>% filter( grepl('^H', sync_tone1))
f0_di_ct_lc_h <- f0_di_ct_lcmh_h %>% filter(syntax == 'L')
f0_di_ct_mh_h <- f0_di_ct_lcmh_h %>% filter(syntax == 'M')
f0_di_ct_lcmh_l <- f0_di_ct_lcmh %>% filter( grepl('^[LR]', sync_tone1))
# yinping-initial LC & MH
f0_di_ct_lcmh_hp <- f0_di_ct_lcmh_h %>% filter(hist_tone1 == 'yinping')
ggplotly(draw_by(f0_di_ct_lcmh_hp, 'speaker'), tooltip = c('text', 'x'))
# yinshang-initial LC & MH
f0_di_ct_lcmh_hs <- f0_di_ct_lcmh_h %>% filter(hist_tone1 == 'yinshang')
ggplotly(draw_by(f0_di_ct_lcmh_hs, 'speaker'), tooltip = c('text', 'x'))
# yangping-initial LC & MH
f0_di_ct_lcmh_lp <- f0_di_ct_lcmh_l %>% filter(hist_tone1 == 'yangping')
ggplotly(draw_by(f0_di_ct_lcmh_lp, 'speaker'), tooltip = c('text', 'x'))
# yangshang-initial LC & MH
f0_di_ct_lcmh_ls <- f0_di_ct_lcmh_l %>% filter(hist_tone1 == 'yangshang')
ggplotly(draw_by(f0_di_ct_lcmh_ls, 'speaker'), tooltip = c('text', 'x'))
unique(f0_di_ct_lcmh_h$sandhi_tone) # check the labels
## [1] "HMML" "MHHL" "MMMH" "HHHH"
p_cluster(f0_di_ct_lcmh_h, sandhi_tone)
# monosyllabic tone (initial tone)
distri_count(f0_di_ct_lcmh_h, speaker, sync_tone1)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
distri_count(f0_di_ct_lcmh_h, hist_tone1, sync_tone1)
# by second tone
distri_count(f0_di_ct_lcmh_h, sync_tone2, sandhi_tone)
distri_count(f0_di_ct_lcmh_h, hist_tone2, sandhi_tone)
# by first tone
distri_count(f0_di_ct_lcmh_h, sync_tone1, sandhi_tone)
distri_count(f0_di_ct_lcmh_h, hist_tone1, sandhi_tone)
# by speaker
distri_count(f0_di_ct_lcmh_hp, speaker, sandhi_tone)
distri_count(f0_di_ct_lcmh_hs, speaker, sandhi_tone)
# by item
distri_count(f0_di_ct_lcmh_hp, citation_no, sandhi_tone)
distri_count(f0_di_ct_lcmh_hs, citation_no, sandhi_tone)
# by syntax
distri_count(f0_di_ct_lcmh_h, syntax, sandhi_tone)
distri_count(f0_di_ct_lcmh_h, speaker, sync_tone1, sandhi_tone)
Count vs. percentage
distri_count(f0_di_ct_lcmh_h, sync_tone2, sandhi_tone)
distri_prop(f0_di_ct_lcmh_h, sync_tone2, sandhi_tone)
p_cluster(f0_di_ct_lcmh_h, sandhi_tone, 'speaker')
p_cluster(f0_di_ct_lc_h, sandhi_tone, 'speaker')+ggtitle('Lexical compounds')
p_cluster(f0_di_ct_mh_h, sandhi_tone, 'speaker')+ggtitle('Adjective-Noun phrases')
# first tone: synchronic categories
p_sub_cluster(f0_di_ct_lcmh_h, sync_tone1, sandhi_tone)
p_sub_cluster(f0_di_ct_lc_h, sync_tone1, sandhi_tone)+ggtitle('Lexical compounds')
p_sub_cluster(f0_di_ct_mh_h, sync_tone1, sandhi_tone)+ggtitle('Adjective-Noun phrases')
# second tone [synchronic] & syntax
p_sub_cluster(f0_di_ct_lcmh_h, sync_tone2, sandhi_tone)
p_sub_cluster(f0_di_ct_lc_h, sync_tone2, sandhi_tone)+ggtitle('Lexical compounds')
p_sub_cluster(f0_di_ct_mh_h, sync_tone2, sandhi_tone)+ggtitle('Adjective-Noun phrases')
p_cluster() vs. p_sub_cluster()
p_sub_cluster(f0_di_ct_lcmh_h, sync_tone1, sandhi_tone)
p_cluster(f0_di_ct_lcmh_h, sync_tone1, 'sandhi_tone')
# data preparation
f0_di_ct_lcmh_h_kmeans <- f0_di_ct_lcmh_h %>%
select(-diortri, -syllable_no, -focus_no, -f0) %>%
spread(time, norm_f0)
# k-means clustering
cluster_model <- k_means_clustering(f0_di_ct_lcmh_h_kmeans)
kml(cluster_model, nbClusters = 2:10)
## ~ Fast KmL ~
## ***************************************************************************************************S
## 100 ********************************************************************************S
kml::plot(cluster_model, 4, parTraj=parTRAJ(col="clusters"))
# get cluster results
f0_di_ct_lcmh_h_kmeans <- f0_di_ct_lcmh_h_kmeans %>%
mutate(cluster4 = getClusters(cluster_model, 4),
sub_cluster = paste0(sandhi_tone, '_', cluster4))
# heatmap distribution
cluster_solution <- wide_to_long(f0_di_ct_lcmh_h_kmeans)
heatmap_df <- heatmap_data(cluster_solution, cluster4)
compare_cluster(heatmap_df, 'cluster4')
Examine mismathes
cluster_hhhh <- cluster_solution %>% filter(sandhi_tone == 'HHHH')
ggplotly(p_cluster(cluster_hhhh, sub_cluster), tooltip = c('text', 'x'))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
cluster_hmml <- cluster_solution %>% filter(sandhi_tone == 'HMML')
ggplotly(p_cluster(cluster_hmml, sub_cluster), tooltip = c('text', 'x'))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Warning: Removed 12 rows containing non-finite values (`stat_summary()`).
cluster_mhhl <- cluster_solution %>% filter(sandhi_tone == 'MHHL')
ggplotly(p_cluster(cluster_mhhl, sub_cluster), tooltip = c('text', 'x'))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Warning: Removed 2 rows containing non-finite values (`stat_summary()`).
cluster_mmmh <- cluster_solution %>% filter(sandhi_tone == 'MMMH')
ggplotly(p_cluster(cluster_mmmh, sub_cluster), tooltip = c('text', 'x'))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.